clear

*** Data preparation do-file 
*** Reproduction script for the paper 
*** "How do researchers choose their goals of inference? A survey experiment on 
*** the effects of the state of research and method preferences on the choice 
*** between research goals"
*** Felix Bethke (ORCID: https://orcid.org/0000-0002-4259-6071)
*** Ingo Rohlfing (ORCID: https://orcid.org/0000-0001-8715-4771)

***EU
foreach x in BE CH DE DK NL SE UK NO IT FI{
import excel "data/EU/`x'full4.xlsx", firstrow clear
gen country="`x'"
tostring D3other, replace
tostring M2other, replace
tostring comments, replace
save "data/`x'full4.dta", replace
}

use "data/BEfull4.dta", clear
append using "data/CHfull4.dta" "data/DEfull4.dta" "data/DKfull4.dta" "data/NLfull4.dta" "data/SEfull4.dta" "data/UKfull4.dta" "data/NOfull4.dta" "data/ITfull4.dta" "data/FIfull4.dta"
save "data/EUfull4.dta", replace

drop dataprot* *Time* M3* submitdate startlanguage sec* Eligibility vig1out1 vig3out1 vig4out1 Email comments vig2a vig2b vig1a vig1b vig1c vig1d vig1e vig1f vig1g vig1h vig1out2 vig1aV2 vig1bV2 vig1cV2 vig1dV2 vig1eV2 vig1fV2 vig1gV2 vig1hV2 vig1out1V2 vig3a vig3b vig3out2SQ001 vig3out2SQ003 vig3out2SQ002 vig3aV2 vig3bV2 vig3out1V2 vig4a vig4b vig4out2SQ001 vig4out2SQ002 vig4aV2 vig4bV2 vig4out1V2

**rename variables
rename D1 sex
rename D2 age
rename D3 status
rename D3other statusother
rename attentionSQ001 attention1
rename attentionSQ002 attention2
rename attentionSQ003 attention3
rename attentionSQ005 attention4
rename attentionSQ006 attention5
rename M1 numberofcases
rename M2SQ001 stats     
rename M2SQ002 QCA
rename M2SQ003 process
rename M2SQ004 interpret
rename M2SQ005 experiment
rename M2other othermethod

*recode age
gen age2= age
replace age2=. if age2>100 
replace age2=. if age2<18 
egen agemean = mean(age2)
replace age2 = agemean if age2==. & age !=.
drop age
rename age2 age
label variable age "age"

*label randomized treatments
label define randvig1 1 "small N, binary FI, binary SS" 2 "large N, binary FI, binary SS" 3 "small N, binary FI, continuous SS" 4 "large N, binary FI, continuous SS" 5 "small N, continuous FI, binary SS" 6 "large N, continuous FI, binary SS" 7 "small N, continuous FI, continuous SS " 8 "large N, continuous FI, continuous SS"
label values rand1 randvig1
label define randvig2 1 "extensively studied" 2 "not extensively studied"
label values rand2 randvig2
label define randvig3 1 "small N" 2 "large N"
label values rand3 randvig3
label define randvig4 1 "small N" 2 "large N"
label values rand4 randvig4

*label manipulation checks
label variable Manipulate1 "Y-centric"
label variable Manipulate2 "large N"
label variable Manipulate3 "measurement level"

*label attention checks
label variable attention1 "Globalization and welfare state"
label variable attention2 "Regime form and interstate war"
label variable attention3 "Economic development and democracy"
label variable attention4 "Civil society and elections"
label variable attention5 "Climate change and economic development"

*label culture variables
label variable numberofcases "number of cases"
label variable stats "Statistical analysis"
label variable QCA "QCA"
label variable process "Process Tracing"
label variable interpret "Interpretivist qualitative methods"
label variable experiment "Experiments"
label variable othermeth "Other"

*relabel outcome variables
***Vignette1
*vig1opt1: "If foreign investment in a country changes from low to high, it gets more likely that social spending is high"
gen vig1opt1=.
replace vig1opt1=1 if vig1out2V2SQ002=="Yes"
replace vig1opt1=0 if lastpage>4 & vig1opt1==.

*vig1opt2: "If foreign investment in a country is high, then social spending is high"
gen vig1opt2=.
replace vig1opt2=1 if vig1out2V2SQ003=="Yes"
replace vig1opt2=0 if lastpage>4 & vig1opt2==.

*vig1opt3: "If foreign investment in a country changes from low to high, it gets more likely that social spending increases"
gen vig1opt3=.
replace vig1opt3=1 if vig1out2V2SQ004=="Yes"
replace vig1opt3=0 if lastpage>4 & vig1opt3==.

*vig1opt4: "As foreign investment increases, it gets more likely that social spending changes from low to high and as foreign investment decreases, it gets more likely that social spending changes from high to low"
gen vig1opt4=.
replace vig1opt4=1 if vig1out2V2SQ005=="Yes"
replace vig1opt4=0 if lastpage>4 & vig1opt4==.

*vig1opt5: "If foreign investment in a country is high, then social spending is high and if foreign investment in a country is low, then social spending is low"
gen vig1opt5=.
replace vig1opt5=1 if vig1out2V2SQ006=="Yes"
replace vig1opt5=0 if lastpage>4 & vig1opt5==.

*vig1opt6: "As foreign investment increases, it gets more likely that social spending increases and as foreign investment decreases, it gets more likely that social spending decreases"
gen vig1opt6=.
replace vig1opt6=1 if vig1out2V2SQ007=="Yes"
replace vig1opt6=0 if lastpage>4 & vig1opt6==.

*vig1opt7: "If foreign investment in a country is low, then social spending is low"
gen vig1opt7=.
replace vig1opt7=1 if vig1out2V2SQ008=="Yes"
replace vig1opt7=0 if lastpage>4 & vig1opt7==.
drop vig1out2V2SQ*

**Vignette2
replace vig2out2="A1" if vig2out2=="You conduct an exploratory analysis to identify the causes of the phenomenon"
replace vig2out2="A2" if vig2out2=="You conduct a confirmatory study to evaluate how particular factors on average influence the phenomenon"
replace vig2out2="A3" if vig2out2=="You conduct a confirmatory study that traces the process of how particular factors influence the phenomenon within individual cases"
gen vig2outcome=.
replace vig2outcome=1 if vig2out2=="A1" 
replace vig2outcome=2 if vig2out2=="A2" 
replace vig2outcome=3 if vig2out2=="A3" 
label define vig2outcome 1 "exploratory" 2 "confirmatory" 3 "process"
label values vig2outcome vig2outcome
drop vig2out2

**Vignette3
*vig3opt1: "Democratic dyad is a sufficient condition for interstate peace"
gen vig3opt1=.
replace vig3opt1=1 if vig3out2V2SQ001=="Yes"
replace vig3opt1=0 if vig3out2V2SQ001=="No"
replace vig3opt1=. if vig3out2V2SQ001=="N/A"
*vig3opt2: "Nondemocratic dyad is a necessary condition for war between states"
gen vig3opt2=.
replace vig3opt2=1 if vig3out2V2SQ002=="Yes"
replace vig3opt2=0 if vig3out2V2SQ002=="No"
replace vig3opt2=. if vig3out2V2SQ002=="N/A"
*vig3opt3: "If two states are democratic, interstate war between them is less likely"
gen vig3opt3=.
replace vig3opt3=1 if vig3out2V2SQ003=="Yes"
replace vig3opt3=0 if vig3out2V2SQ003=="No"
replace vig3opt3=. if vig3out2V2SQ003=="N/A"
drop vig3out2V2SQ*

**Vignette4
*vig4opt1: "Economic development is necessary to achieve democracy"
gen vig4opt1=.
replace vig4opt1=1 if vig4out2V2SQ001=="Yes"
replace vig4opt1=0 if vig4out2V2SQ001=="No"
replace vig4opt1=. if vig4out2V2SQ001=="N/A"
*vig4opt2: "The level of economic development is positively related to the level of democracy"
gen vig4opt2=.
replace vig4opt2=1 if vig4out2V2SQ002=="Yes"
replace vig4opt2=0 if vig4out2V2SQ002=="No"
replace vig4opt2=. if vig4out2V2SQ002=="N/A"
drop vig4out2V2SQ*

save "data/EUfull4.dta", replace

**USA
import excel "data/USA/USfull5.xlsx", firstrow clear
gen country="USA"
drop *Time* M3* submitdate startlanguage sec* Eligibility vig1out1 vig3out1 vig4out1 Email comments vig2a vig2b vig1a vig1b vig1c vig1d vig1e vig1f vig1g vig1h vig1out2 vig1aV2 vig1bV2 vig1cV2 vig1dV2 vig1eV2 vig1fV2 vig1gV2 vig1hV2 vig1out1V2 vig3a vig3b vig3out2SQ001 vig3out2SQ003 vig3out2SQ002 vig3aV2 vig3bV2 vig3out1V2 vig4a vig4b vig4out2SQ001 vig4out2SQ002 vig4aV2 vig4bV2 vig4out1V2

**rename variables
rename D1 sex
rename D2 age
rename D3 status
rename D3other statusother
rename attentionSQ001 attention1
rename attentionSQ002 attention2
rename attentionSQ003 attention3
rename attentionSQ005 attention4
rename attentionSQ006 attention5
rename M1 numberofcases
rename M2SQ001 stats     
rename M2SQ002 QCA
rename M2SQ003 process
rename M2SQ004 interpret
rename M2SQ005 experiment
rename M2other othermethod

*recode age
destring age, gen(age2) force
replace age2=. if age2>100 
replace age2=. if age2<18 
egen agemean = mean(age2)
replace age2 = agemean if age2==. & age !=""
drop age
rename age2 age
label variable age "age"

*label randomized treatments
label define randvig1 1 "small N, binary FI, binary SS" 2 "large N, binary FI, binary SS" 3 "small N, binary FI, continuous SS" 4 "large N, binary FI, continuous SS" 5 "small N, continuous FI, binary SS" 6 "large N, continuous FI, binary SS" 7 "small N, continuous FI, continuous SS " 8 "large N, continuous FI, continuous SS"
label values rand1 randvig1
label define randvig2 1 "extensively studied" 2 "not extensively studied"
label values rand2 randvig2
label define randvig3 1 "small N" 2 "large N"
label values rand3 randvig3
label define randvig4 1 "small N" 2 "large N"
label values rand4 randvig4


*label manipulation checks
label variable Manipulate1 "Y-centric"
label variable Manipulate2 "large N"
label variable Manipulate3 "measurement level"

*label attention checks
label variable attention1 "Globalization and welfare state"
label variable attention2 "Regime form and interstate war"
label variable attention3 "Economic development and democracy"
label variable attention4 "Civil society and elections"
label variable attention5 "Climate change and economic development"

*label culture variables
label variable numberofcases "number of cases"

label variable stats "Statistical analysis"
label variable QCA "QCA"
label variable process "Process Tracing"
label variable interpret "Interpretivist qualitative methods"
label variable experiment "Experiments"
label variable othermeth "Other"

*relabel outcome variables
***Vignette1
*vig1opt1: "If foreign investment in a country changes from low to high, it gets more likely that social spending is high"
gen vig1opt1=.
replace vig1opt1=1 if vig1out2V2SQ002=="Yes"
replace vig1opt1=0 if lastpage>4 & vig1opt1==.

*vig1opt2: "If foreign investment in a country is high, then social spending is high"
gen vig1opt2=.
replace vig1opt2=1 if vig1out2V2SQ003=="Yes"
replace vig1opt2=0 if lastpage>4 & vig1opt2==.

*vig1opt3: "If foreign investment in a country changes from low to high, it gets more likely that social spending increases"
gen vig1opt3=.
replace vig1opt3=1 if vig1out2V2SQ004=="Yes"
replace vig1opt3=0 if lastpage>4 & vig1opt3==.

*vig1opt4: "As foreign investment increases, it gets more likely that social spending changes from low to high and as foreign investment decreases, it gets more likely that social spending changes from high to low"
gen vig1opt4=.
replace vig1opt4=1 if vig1out2V2SQ005=="Yes"
replace vig1opt4=0 if lastpage>4 & vig1opt4==.

*vig1opt5: "If foreign investment in a country is high, then social spending is high and if foreign investment in a country is low, then social spending is low"
gen vig1opt5=.
replace vig1opt5=1 if vig1out2V2SQ006=="Yes"
replace vig1opt5=0 if lastpage>4 & vig1opt5==.

*vig1opt6: "As foreign investment increases, it gets more likely that social spending increases and as foreign investment decreases, it gets more likely that social spending decreases"
gen vig1opt6=.
replace vig1opt6=1 if vig1out2V2SQ007=="Yes"
replace vig1opt6=0 if lastpage>4 & vig1opt6==.

*vig1opt7: "If foreign investment in a country is low, then social spending is low"
gen vig1opt7=.
replace vig1opt7=1 if vig1out2V2SQ008=="Yes"
replace vig1opt7=0 if lastpage>4 & vig1opt7==.
drop vig1out2V2*

**Vignette2
replace vig2out2="A1" if vig2out2=="You conduct an exploratory analysis to identify the causes of the phenomenon"
replace vig2out2="A2" if vig2out2=="You conduct a confirmatory study to evaluate how particular factors on average influence the phenomenon"
replace vig2out2="A3" if vig2out2=="You conduct a confirmatory study that traces the process of how particular factors influence the phenomenon within individual cases"
gen vig2outcome=.
replace vig2outcome=1 if vig2out2=="A1" 
replace vig2outcome=2 if vig2out2=="A2" 
replace vig2outcome=3 if vig2out2=="A3" 
label define vig2outcome 1 "exploratory" 2 "confirmatory" 3 "process"
label values vig2outcome vig2outcome
drop vig2out2

**Vignette3
*vig3opt1: "Democratic dyad is a sufficient condition for interstate peace"
gen vig3opt1=.
replace vig3opt1=1 if vig3out2V2SQ001=="Yes"
replace vig3opt1=0 if vig3out2V2SQ001=="No"
replace vig3opt1=. if vig3out2V2SQ001=="N/A"
*vig3opt2: "Nondemocratic dyad is a necessary condition for war between states"
gen vig3opt2=.
replace vig3opt2=1 if vig3out2V2SQ002=="Yes"
replace vig3opt2=0 if vig3out2V2SQ002=="No"
replace vig3opt2=. if vig3out2V2SQ002=="N/A"
*vig3opt3: "If two states are democratic, interstate war between them is less likely"
gen vig3opt3=.
replace vig3opt3=1 if vig3out2V2SQ003=="Yes"
replace vig3opt3=0 if vig3out2V2SQ003=="No"
replace vig3opt3=. if vig3out2V2SQ003=="N/A"
drop vig3out2V2*

**Vignette4: Note that the ordering of outcome statement is different for EU and US
*vig4opt1: "Economic development is necessary to achieve democracy"
gen vig4opt1=.
replace vig4opt1=1 if vig4out2V2SQ001=="Yes"
replace vig4opt1=0 if lastpage>7 & vig4opt1==.
*vig4opt2: "The level of economic development is positively related to the level of democracy"
gen vig4opt2=.
replace vig4opt2=1 if vig4out2V2SQ002=="Yes"
replace vig4opt2=0 if lastpage>7 & vig4opt2==.
drop vig4out2V2SQ*

save "data/USAfull4.dta", replace


***combine data
append using "data/EUfull4.dta", generate(mark)
save "data/fulldata.dta", replace
import excel "data/textdata/EUtextdata.xls", sheet("Sheet1") firstrow clear
save "data/EUtextdata.dta", replace
import excel "data/textdata/UStextdata.xls", sheet("Sheet1") firstrow clear
save "data/UStextdata.dta", replace
append using "data/EUtextdata.dta"
save "data/textdata.dta", replace
use "data/fulldata.dta", clear
merge 1:1 country id using "data/textdata.dta"
drop _m
save "data/fulldata.dta", replace

***Prepare coding of "number of cases"
use "data/fulldata.dta", clear
destring numberofcases, generate(numberofcases2) force
export excel country id numberofcases using "data/numberofcases.xls" if numberofcases2==., firstrow(variables) replace
save "data/fulldata.dta", replace
**import coded data
import excel "data/numberofcasescoded.xls", sheet("Sheet1") firstrow clear
rename numberofcases_coded casescoded
*final adjustments // 1: small-n, 2: large-n, 3: mixed-method
*Note that answers of the kind "it depends on the research question" were coded as missing
replace casescode=2 if country=="USA" &	id==520
replace casescode=2 if country=="USA" &	id==200
replace casescode=2 if country=="USA" &	id==223
replace casescode=2 if country=="DE" &	id==91
replace casescode=2 if country=="USA" &	id==297
replace casescode=2 if country=="DE" &	id==142
replace casescode=2 if country=="USA" &	id==477
replace casescode=3 if country=="UK" &	id==323
replace casescode=2 if country=="UK" &	id==277
replace casescode=2 if country=="USA" &	id==162
replace casescode=2 if country=="USA" &	id==144
replace casescode=2 if country=="CH" &	id==124
replace casescode=1 if country=="USA" &	id==668
replace casescode=2 if country=="USA" &	id==777
replace casescode=2 if country=="SE" &	id==33
replace casescode=2 if country=="SE" &	id==75
replace casescode=2 if country=="UK" &	id==153
replace casescode=1 if country=="IT" &	id==42
replace casescode=3 if country=="NL" &	id==89
replace casescode=2 if country=="UK" &	id==112
replace casescode=2 if country=="USA" &	id==495
replace casescode=2 if country=="USA" &	id==343
replace casescode=2 if country=="USA" &	id==213
drop numberofcases
rename casescoded numberofcases
save "data/numberofcasescoded.dta", replace

*combine numeric & coded data
use "data/fulldata.dta", clear
drop numberofcases
merge 1:1 country id using "data/numberofcasescoded.dta"
drop _m
****Define categorical variable for number of cases
replace numberofcases=1 if numberofcases2 <51 & numberofcases2 !=.
replace numberofcases=2 if numberofcases2 >50 & numberofcases2 !=.
label define numberofcases 1 "small N" 2 "large N" 3 "mixed-method" 99 "not applicable"
label values numberofcases numberofcases
save "data/fulldata.dta", replace


***Prepare coding of "othermethod"
use "data/fulldata.dta", clear
export excel country id othermethod using "data/othermethod.xls" if othermethod!="", firstrow(variables) replace
drop othermethod
save "data/fulldata.dta", replace
import excel "data/othermethodcoded.xls", sheet("Sheet1") firstrow clear
save "data/othermethodcoded.dta", replace
use "data/fulldata.dta", clear
merge 1:1 country id using "data/othermethodcoded.dta"
drop _m
**Define categorical variable for method preferences
gen qualmeth=.
gen quantmeth=.
gen mixedmeth=.
replace qualmeth=1 if QCA=="Yes" | process =="Yes" | interpret =="Yes" | othermethodcat=="1"
replace quantmeth=1 if stats=="Yes" | experiment =="Yes" | othermethodcat=="2"
replace mixedmeth=1 if qualmeth==1 & quantmeth==1 | othermethodcat=="3"
gen methodcat=.
replace methodcat=1 if qualmeth==1 & quantmeth==. & mixedmeth==.
replace methodcat=2 if qualmeth==. & quantmeth==1 & mixedmeth==.
replace methodcat=3 if qualmeth==1 & quantmeth==1 | mixedmeth==1
label define methodcat 1 "small N" 2 "large N" 3 "mixed-method" 99 "not applicable"
label values methodcat methodcat
save "data/fulldata.dta", replace

***Prepare coding of "status"
use "data/fulldata.dta", clear
export excel country id statusother using "data/statusother.xls" if statusother!="", firstrow(variables) replace
import excel "data/statusothercoded.xls", sheet("Sheet1") firstrow clear
save "data/statusothercoded.dta", replace
use "data/fulldata.dta", clear
merge 1:1 country id using "data/statusothercoded.dta"
drop _m
***Define categorical status variable
destring statuscat, replace force
replace statuscat= 1 if status=="Associate Professor" | status=="Full Professor"
replace statuscat= 2 if status=="Postdoc" | status=="Assistant Professor"
replace statuscat=3 if status=="Graduate Student"
label define statuscat 1 "Professor" 2 "Senior Scholar" 3 "Junior Scholar" 99 "not applicable"
label values statuscat statuscat
save "data/fulldata.dta", replace

***Define categorical variable for research cultures
gen culture=.
replace culture=1 if  methodcat==1 & numberofcases==1
replace culture=2 if  methodcat==2 & numberofcases==2
replace culture=3 if  methodcat==3 | numberofcases==3
label define culture 1 "Qualitative" 2 "Quantitative" 3 "Mixed-method" 99 "not applicable"
label values culture culture
save "data/fulldata.dta", replace

***encode string variables
encode country, gen(countrynew)
encode sex, gen(gender)
label variable gender "gender"
*encode attention1, gen(attention1n)
*encode attention2, gen(attention2n)
*encode attention3, gen(attention3n)
*encode attention4, gen(attention4n)
*encode attention5, gen(attention5n)
gen Manipulate1n=0
replace Manipulate1n=1 if Manipulate1=="Yes"
replace  Manipulate1n=. if Manipulate1=="N/A"
gen Manipulate2n=0
replace Manipulate2n=1 if Manipulate2=="Yes"
replace  Manipulate2n=. if Manipulate2=="N/A"
gen Manipulate3n=0
replace Manipulate3n=1 if Manipulate3=="Yes"
replace  Manipulate3n=. if Manipulate3=="N/A"

label variable Manipulate1n "Y-centric"
label variable Manipulate2n "large N"
label variable Manipulate3n "measurement level"

label define manipulate 0 "No" 1 "Yes"
label values Manipulate1n manipulate
label values Manipulate2n manipulate
label values Manipulate3n manipulate

***generate categorical variables from codings of open-ended questions
*vig1
gen vig1txtn=.
replace vig1txtn=1 if vig1txt=="s"
replace vig1txtn=2 if vig1txt=="c"
replace vig1txtn=3 if vig1txt=="b"
replace vig1txtn=99 if vig1txt=="u"
drop vig1txt
rename vig1txtn vig1txt
gen vig1set=0
replace vig1set=1 if vig1txt==1
label define set 0 "not set-theoretical" 1 "set-theoretical" 
label values vig1set set
gen vig1corr=0
replace vig1corr=1 if vig1txt==2
label define corr 0 "not correlational" 1 "correlational" 
label values vig1corr corr
*vig3
gen vig3txtn=.
replace vig3txtn=1 if vig3txt=="s"
replace vig3txtn=2 if vig3txt=="c"
replace vig3txtn=3 if vig3txt=="b"
replace vig3txtn=99 if vig3txt=="u"
drop vig3txt
rename vig3txtn vig3txt
gen vig3set=0
replace vig3set=1 if vig3txt==1
label values vig3set set
gen vig3corr=0
replace vig3corr=1 if vig3txt==2
label values vig3corr corr
*vig4
gen vig4txtn=.
replace vig4txtn=1 if vig4txt=="s"
replace vig4txtn=2 if vig4txt=="c"
replace vig4txtn=3 if vig4txt=="b"
replace vig4txtn=99 if vig4txt=="u"
drop vig4txt
rename vig4txtn vig4txt
gen vig4set=0
replace vig4set=1 if vig4txt==1
label values vig4set set
gen vig4corr=0
replace vig4corr=1 if vig4txt==2
label values vig4corr corr
label define vigtxt 1 "set-theoretical" 2 "correlational" 3 "both" 99 "unclear"
label values vig1txt vigtxt
label values vig3txt vigtxt
label values vig4txt vigtxt

save "data/fulldata.dta", replace

*gen dummy variables for experiment 2
gen vig1largeN=0
replace vig1largeN=1 if rand1==2 | rand1==4 | rand1==6 | rand1==8
label define largeN 0 "small N" 1 "large N" 
label values vig1largeN largeN

gen vig1continouscause=0
replace vig1continouscause=1 if rand1==5 | rand1==6 | rand1==7 | rand1==8
label define causemeasure 0 "binary cause" 1 "continous cause" 
label values vig1continouscause causemeasure

gen vig1continousoutcome=0
replace vig1continousoutcome=1 if rand1==3 | rand1==4 | rand1==7 | rand1==8
label define effectmeasure 0 "binary effect" 1 "continous effect" 
label values vig1continousoutcome effectmeasure


*Whereas statements 1-3 indicate a set-theoretic perspective on causality, the statements 4-7 imply a correlational causal reasoning.
gen vig1closedset=0
replace vig1closedset=1 if vig1opt2==1 | vig1opt5==1 | vig1opt7==1 
gen vig1closedcorr=0
replace vig1closedcorr=1 if vig1opt1==1 | vig1opt3==1 | vig1opt4==1 | vig1opt6==1 

*generate count variables of the number of set or correlation answers for experiment 2
gen vig1closedsetcount=.
replace vig1closedsetcount= vig1opt2 + vig1opt5 + vig1opt7
gen vig1closedcorrcount=.
replace vig1closedcorrcount= vig1opt1 + vig1opt3 + vig1opt4 + vig1opt6

*generate preference variable
gen vig1pref=vig1closedsetcount-vig1closedcorrcount

*label statment variables and generate alternative variable names
label variable vig1opt1 "corr. statement 1"
label variable vig1opt2 "set statement 1"
label variable vig1opt3 "corr. statement 2"
label variable vig1opt4 "corr. statement 3"
label variable vig1opt5 "set statement 2"
label variable vig1opt6 "corr. statement 4"
label variable vig1opt7 "set statement 3"

gen corr1=vig1opt1
gen corr2=vig1opt3
gen corr3=vig1opt4
gen corr4=vig1opt6
gen set1=vig1opt2
gen set2=vig1opt5
gen set3=vig1opt7

**generate age groups defined by APSA
egen agecat = cut(age), at(0,25,35,45,55,65,75,100) label

*gen EU/US groups
gen continent="EU"
replace continent="US" if country=="USA"
encode continent, gen(continent2)
drop continent
rename continent2 continent

*gen answer combinations for vig3
egen label3 = concat(vig3opt?) if vig3opt1 !=.
egen group3 = group(label3), label
label define vig3outcat 1 "not sufficient, not necessary, not likely" 2 "not sufficient, not necessary, likely" 3 "not sufficient, necessary, not likely" 4 "not sufficient, necessary, likely" 5 "sufficient, not necessary, not likely" 6 "sufficient, not necessary, likely" 7 "sufficient, necessary, not likely" 8 "sufficient, necessary, likely" 
label values group3 vig3outcat  
decode group3, gen(vig3outcat)
*gen dummies for answer combinations
tab vig3outcat, gen(vig3outcatdum)

*gen answer combinations for vig4
egen label4 = concat(vig4opt?) if vig4opt1 !=.
egen group4 = group(label4), label
label define vig4outcat 1 "not set-theoretic, not correlational" 2 "not set-theoretic, correlational" 3 "set-theoretic, not correlational" 4 "set-theoretic, correlational" 
label values group4 vig4outcat  
decode group4, gen(vig4outcat)
*gen dummies for answer combinations
tab vig4outcat, gen(vig4outcatdum)


*gen mulinominal outcome measures from open-ended questions
*exclude open-ended answers that feature both correlational and set-theoretical reasoning 
gen vig1txt2=vig1txt
gen vig3txt2=vig3txt
gen vig4txt2=vig4txt
replace vig1txt2=. if vig1txt==3
replace vig3txt2=. if vig3txt==3
replace vig4txt2=. if vig4txt==3
egen vig1open = group(vig1txt2), label
egen vig3open = group(vig3txt2), label
egen vig4open = group(vig4txt2), label
label define vigopen 1 "set-theoretical" 2 "correlational" 3 "unclear"
label values vig1open vigopen  
label values vig3open vigopen  
label values vig4open vigopen  

save "data/fulldata.dta", replace

***import data on response rates
import delimited "data/responserate.csv", varnames(1) encoding(utf8) clear 
save "data/responserate.dta", replace
